#define vec2 float2
#define vec4 float4
#define rgb xyz
#define rgba xyzw

typedef struct
{
	int width[8];
	int height[8];
	float cur_time;
	float total_time;
	float origROI[4];
	float resultROI[4];
	float angle;
}FilterParam;

const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;

static int get_global_id0(__global FilterParam* param)
{
	return get_global_id(0) - param->origROI[0]* param->width[0];
}

static int get_global_id1(__global FilterParam* param)
{
	return get_global_id(1) - param->origROI[1]* param->height[0];
}

vec4 INPUTSRC(image2d_t src_data, __global FilterParam* param, vec2 tc)
{
	tc = (vec2)(tc.x, tc.y)*(vec2)(param->origROI[2], param->origROI[3]) + (vec2)(param->origROI[0], param->origROI[1]);
	return read_imagef(src_data, sampler, tc);
}

vec4 INPUT(image2d_t ovelay1,  __global FilterParam* param, vec2 tc)
{
	return read_imagef(ovelay1, sampler, (vec2)(tc.x,tc.y) );
}

// Filter VideoWall
__kernel void filter_VideoWall(__read_only image2d_t image,   		// input image
							   __write_only image2d_t retImage,   	// image result
							   __private int horizonetalCnts,		// horizontal girds
							   __private int verticalCnts,			// vertical grids
							   __private int middleHoriCnts,		// middle horizontal girds
							   __private int middleVetiCnts,		// middle vertical girds
							   __private int alpha,					// blend factor, scrope[0-100]	
							   __global FilterParam* param)	 			
{
	int width = get_image_width(image);
	int height = get_image_height(image);
	int W = get_global_size(0);
	int H = get_global_size(1);
	int w = get_global_id(0);
	int h = get_global_id(1);
	
	if(w >= width || h >= height)
			return;
			
	width = W;
	height = H; 
	float2 resolution = (float2)(W,H);
	vec2 fragCoord = (vec2)(get_global_id0( param), get_global_id1( param));
	w = fragCoord.x;
	h = fragCoord.y;
	
	vec2 uv = ((vec2)(fragCoord.x, fragCoord.y) + (vec2)(0.5f))/resolution.xy;
	vec2 onePixel = 2.0f/resolution.xy;
	
	float outSideMatt = step(onePixel.x, uv.x)*step(uv.x, 1.0f - onePixel.x)*step(onePixel.y, uv.y)*step(uv.y, 1.0f - onePixel.y);
	int blockWidth = width / horizonetalCnts;
	int blockHeight = height / verticalCnts;
	int blockIndex_x = w / blockWidth;
	int blockIndex_y = h / blockHeight;
	int inBlock_x = w - blockIndex_x * blockWidth;
	int inBlock_y = h - blockIndex_y * blockHeight;
	int midBlockStartIndex_x = (horizonetalCnts - middleHoriCnts) / 2;
	int midBlockStartIndex_y = (verticalCnts - middleVetiCnts) / 2;
	int midBlockEndIndex_x = midBlockStartIndex_x + middleHoriCnts;
	int midBlockEndIndex_y = midBlockStartIndex_y + middleVetiCnts;
	int offset;
	
	if(blockIndex_x >= midBlockStartIndex_x && blockIndex_x < midBlockEndIndex_x &&
		blockIndex_y >= midBlockStartIndex_y && blockIndex_y < midBlockEndIndex_y)
	{
		int midBlockWidth = width - 2 * blockWidth;
		int midBlockHeight = height - 2 * blockHeight;
		int offx = w - blockWidth;
		int offy = h - blockHeight;
		
		offx = width * ((float)(offx) / midBlockWidth);
		offy = height * ((float)(offy) / midBlockHeight);
		inBlock_x = offx;
		inBlock_y = offy;
		
		offset = offy * width + offx;
	}
	else
		offset = (inBlock_y * verticalCnts) * width + inBlock_x * horizonetalCnts;
	
	float4 color = INPUTSRC(image, param, uv);

	float resultX0 = param->resultROI[0];
	float resultY0 = param->resultROI[1];
	float resultX1 = param->resultROI[2]+param->resultROI[0];
	float resultY1 = param->resultROI[3]+param->resultROI[1];
	
	//float matt = step(resultX0,uv.x)*step(uv.x, resultX1)*step(resultY0,uv.y)*step(uv.y, resultY1);
	float4 retColor = INPUTSRC(image, param, (vec2)(offset % width, offset / width)/resolution);
	
	if(inBlock_x == 0 || inBlock_y == 0)
		retColor = (float4)(0.0f, 0.0f, 0.0f, 1.0f);
	
	float factor = (float)(alpha)/100.0f;
	float4 ret = retColor * factor + (1.0f - factor) * color;
	
	write_imagef(retImage, (int2)(get_global_id(0), get_global_id(1)), ret);
}

